*************************************************************************************************
/*																								
		Purpose: Bring in 2010-2018 GSS and assign income scores.				
		Creates: GSS_10to18_analysis.dta										
*/																								
*************************************************************************************************

clear all
set more off
set maxvar 10000

cd "$Mydirectory1/1_DataSources/GSS/"

* Bring in GSS data
use ./input/GSS7218_R1.dta, clear //download from GSS website
sort year id

keep if year>2010 

* Obtain CPI
preserve
quietly run "../CPI/CPI_deflator.do"
restore

*------------------------------------------------------------------------------*
*------------------------------------------------------------------------------*
	
*********************
** FATHER'S OCCUPATION  
*********************

cd "$Mydirectory1/1_DataSources/GSS/"

/* Using a GSS-provided crosswalk, convert the 2010 census occupations of paocc10 
   into the ~30 ANES occupations. Do the same for respondent and spouse occupation.
*/

ren *, lower

foreach var of varlist occ10 spocc10 maocc10 paocc10   {

	rename `var' census2010 
	replace census2010=. if census2010==.i | census2010==.n | census2010==.d  | census2010==9997

* Crosswalk here the 2010 occupations to the ANES ones (merge adds one variable: fatheroccej)
	merge m:1 census2010 using "../Crosswalks/Crosswalk_2010Census_toANES.dta"
	tab census2010 if _merge==1, nol
	assert census2010==. if _merge==1
	drop if _merge==2
	rename fatheroccej_2010 fatheroccej
	
	tab census2010 if fatheroccej==., m nol

	
*---------*

	count if _merge==1 & fatheroccej==.
	drop _merge

* Replace people who are self-employed managers (currently coded as 28) with 21 if they are self-employed
	
if "`var'"=="occ10" {
	
	replace fatheroccej=21 if fatheroccej==28 & wrkslf==1
	
	rename fatheroccej occR_10
	rename census2010 `var'
}
if "`var'"=="spocc10" {
	
	replace fatheroccej=21 if fatheroccej==28 & spwrkslf==1
	
	rename fatheroccej occSP_10
	rename census2010 `var'
}

if "`var'"=="maocc10" {
	 replace fatheroccej=21 if fatheroccej==28 & mawrkslf==1
	 ren fatheroccej motheroccej_10
	 rename census2010 `var'	 

}
if "`var'"=="paocc10" {
	replace fatheroccej=21 if fatheroccej==28 & pawrkslf==1
	rename census2010 `var'
}
}

	rename fatheroccej fatheroccej10
	label var fatheroccej "Father's occupation, coarsened"
	label var occSP_10 "Spouse occupation, coarsened"
	label var occR_10 "Resp. occupation, coarsened"
	label var motheroccej_10 "Mother's occupation, coarsened"

*------------------------------------------------------------------------------*
*------------------------------------------------------------------------------*

*********************
** TOTAL FAMILY INCOME 
*********************
	
* Midpoints of 2006-2014 income variable (use "income06" to construct)
	gen income06_bins=.
	replace income06_bins=0.75*10000 if income06>=1 & income06<=8 //<10k
	replace income06_bins=12500 if income06==9 | income06==10 //10-15k
	replace income06_bins=17500 if income06==11 | income06==12 //15-20
	replace income06_bins=22500 if income06==13 | income06==14 //20-25
	replace income06_bins=30000 if income06==15 | income06==16 //25-35
	replace income06_bins=42500 if income06==17 | income06==18 //35-50
	replace income06_bins=62500 if income06==19 | income06==20 //50-75 
	replace income06_bins=92500 if income06==21 | income06==22 //75-110 
	replace income06_bins=130000 if income06==23 | income06==24 //110-150 
	replace income06_bins=1.25*150000 if income06==25 //150k+
	replace income06_bins=. if year==2016 | year==2018
	
* Midpoints of 2015-2018 income variable (use "income16" to construct)
	gen income16_bins=.
	replace income16_bins=0.75*10000 if income16>=1 & income16<=8 //<10k
	replace income16_bins=12500 if income16==9 | income16==10 //10-15k
	replace income16_bins=17500 if income16==11 | income16==12 //15-20
	replace income16_bins=22500 if income16==13 | income16==14 //20-25
	replace income16_bins=30000 if income16==15 | income16==16 //25-35
	replace income16_bins=42500 if income16==17 | income16==18 //35-50
	replace income16_bins=62500 if income16==19 | income16==20 //50-75 
	replace income16_bins=92500 if income16==21 | income16==22 //75-110 
	replace income16_bins=130000 if income16==23 | income16==24 //110-150
	replace income16_bins=160000 if income16==25  //150-170 
	replace income16_bins=1.25*170000 if income16==26 //170k+
	replace income16_bins=. if year<2016
	
/* Construct *one* family income variable by blending binned 
   income variables from previous steps */
   	gen faminc=.
	replace faminc=income06_bins if year>=2006 & year<=2014
	replace faminc=income16_bins if year>2014 
	label var faminc "Family income using bins"

/*
	Note: The suffix "_son" of the following variables is used to match the 
	      variable names in other datasets. All respondents (i.e., male and female) 
	      are given a value for these variables. 
*/	
	gen bottomcoded_son =.
	
	replace bottomcoded_son= (income06_bins==0.75*10000) if ((year>=2006 & year<=2014) & income06_bins<.)
	tab bottomcoded_son if (year>=2006 & year<=2014),m 
	tab income06_bins,m 
	tab bottomcoded_son,m 

	replace bottomcoded_son= (income16_bins==0.75*10000) if (year>2014 & income16_bins<.)
	tab bottomcoded_son if year>2014,m 
	tab income16_bins,m 
	tab bottomcoded_son,m 
	

	gen topcoded_son =.
	
	replace topcoded_son= (income06_bins==1.25*150000) if ((year>=2006 & year<=2014) & income06_bins<.)
	tab topcoded_son if (year>=2006 & year<=2014),m 
	tab income06_bins,m 
	tab topcoded_son,m 
	
	replace topcoded_son= (income16_bins==1.25*170000) if (year>2014 & income16_bins<.)
	tab topcoded_son if year>2014,m 
	tab income16_bins,m 
	tab topcoded_son,m 
	
* Turn fam_inc into 1950 dollars using the CPI: https://data.bls.gov/timeseries/CUUR0000SA0 
	gen year_CPI = year-1 //Subtract 1 to reflect income having been reported for the year prior to survey year. Ensures the right CPIyear numbers are merged.
	merge m:1 year_CPI using ../CPI/CPI_deflator.dta
	drop if _merge==2
	drop _merge
	
	gen fam_inc_real =.
	replace fam_inc_real = faminc * deflator 
	label var fam_inc_real "Family income (bins), in 1950 dollars"

	gen lnfaminc = ln(fam_inc_real)
	label var lnfaminc "Logged family income (bins)"

	
*------------------------------------------------------------------------------*
*------------------------------------------------------------------------------*
	
*******************
***** RESTRICTING SAMPLE
*******************

* keep relevant variables
	keep year id occR_10-lnfaminc age
	drop CPI* year_CPI income*_bins

	keep if age>=30 & age<=50
	compress
	save ./output/GSS_10to18_analysis.dta, replace
